# 拿到页面源代码 requests
# 通过re提取有效信息 re
import requests
import re
import csv

url = "https://movie.douban.com/chart"

headers = {
    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36"
}
resp = requests.get(url, headers=headers)
# print(resp.text)
page_content = resp.text

# 解析数据
obj = re.compile(r'<tr class="item">.*?<td width="100" valign="top">.*?title="(?P<name>.*?)">'
                 r'.*?<p class="pl">(?P<year>.*?)/'
                 r'.*?<span class="rating_nums">(?P<score>.*?)</span>'
                 r'.*?<span class="pl">(?P<num>.*?)</span>', re.S)

result = obj.finditer(page_content)
f = open("data.csv", mode="w", encoding="utf-8")
csvwrite = csv.writer(f)
for it in result:
    # print(it.group("name"), "  ", it.group("year"), "  ", it.group("score"), "  ", it.group("num"), "\n")
    dic = it.groupdict()
    csvwrite.writerow(dic.values())
    # .strop可以消除空行
f.close()
print("over!")
